require("dplyr")
library("haven")

## Set working directory
## to Dataverse folder

survey <- read_sav("Harris_Data/Harris 2010 Public Opinion Survey, study no. 38512/harris_38512.sav")


# pid
survey$pid <- c(1:nrow(survey))

# study 
survey$study <- "38512"

# study year (year)
survey$year <- 2010

# geographic data (urban)
table(survey$q105)

survey$urban <- as.character(as_factor(survey$q105))

# geographic data (region)
survey$region <- as.character(as_factor(survey$regn1))
survey$region <- gsub("\\s-\\s.*$", "", survey$region)
table(survey$region)

# respondent head of household (hh)
survey$hh <- NA

# increasing inequality (inequality)
survey$inequality <- as.character(as_factor(survey$q6062))
#survey$inequality[survey$inequality == "Refused"] <- NA
table(survey$inequality)

# inequality variable (inequality.variable)
survey$inequality.variable <- 1

# union (union.self)
survey$union.self <- NA
survey$union.other <- NA

# employment (employed)
survey$employed <- NA

# empl self
survey$employed.self <- NA

# occupation
survey$occupation <- NA

# occ self
survey$occupation.self <- NA

# household size (hhsize)
table(survey$q204)
survey$hhsize_over18 <- as.numeric(survey$q204)
survey$hhsize_over18[survey$hhsize_over18 == 98 |
                       survey$hhsize_over18 == 99] <- NA
table(survey$q1035)
survey$hhsize_1317 <- as.numeric(survey$q1035)
survey$hhsize_1317[survey$hhsize_1317 == 98 |
                     survey$hhsize_1317 == 99] <- NA
table(survey$q1040)
survey$hhsize_under13 <- as.numeric(survey$q1040)
survey$hhsize_under13[survey$hhsize_under13 == 98 |
                        survey$hhsize_under13 == 99] <- NA

survey$hhsize <- NA
for(i in 1:nrow(survey)){
  x <- survey[i,]
  x1 <- unlist(x[which(colnames(survey) == "hhsize_over18")])
  x2 <- unlist(x[which(colnames(survey) == "hhsize_1317")])
  x3 <- unlist(x[which(colnames(survey) == "hhsize_under13")])
  hh <- sum(x1, x2, x3, na.rm = TRUE)
  survey$hhsize[i] <- hh
  print(i)
}
survey$hhsize[survey$hhsize == 0] <- NA
table(survey$hhsize)

# education (educ)
table(survey$q216)
survey$educ <- as.character(as_factor(survey$q216))
table(survey$educ)

# household income 
## didn't finish here 
survey$income1 <- as.character(as_factor(survey$q233))
survey$income2 <- as.character(as_factor(survey$q235))
sum(!is.na(survey$income1) &
      !is.na(survey$income2))
survey$income <- ifelse(is.na(survey$income1),
                        survey$income2, survey$income1)
table(survey$income)
table(survey$income1[survey$income1 == "Decline to answer"],
      survey$income2[survey$income1 == "Decline to answer"])

# age
survey$age <- as.character(as_factor(survey$q1030))
table(survey$age)

# race
survey$race1 <- as.character(as_factor(survey$q238))
survey$race2 <- as.character(as_factor(survey$q236))
survey$race <- ifelse(survey$race1 == "Decline to answer" |
                        survey$race1 == "Not sure",
                      "Decline to answer/not sure",
                      ifelse(survey$race1 == "White",
                             ifelse(survey$race2 == "Decline to answer" |
                                      survey$race2 == "Not sure",
                                    "Decline to answer/not sure",
                                    ifelse(survey$race2 == "No, not of Spanish, Hispanic, or Latino origin or descent",
                                           "Non-Hispanic White", "Hispanic White")), "Non-White"))
table(survey$race)
table(survey$race1[survey$race2 == "Yes, of Spanish, Hispanic, or Latino origin or descent"])
table(survey$race2[survey$race1 == "White"])

# politics (party)
survey$party <- as.character(as_factor(survey$q1500))
table(survey$party)

# politics (ideology)
survey$ideology <- as.character(as_factor(survey$q1520))

# gender
survey$gender <- as.character(as_factor(survey$q1020))

# religion
survey$religion <- NA

#factuals
survey$factual1 <- NA
survey$factual2 <- NA
survey$factual3 <- NA

## alienation index
survey$dontcare <- as.character(as_factor(survey$q6061))
survey$dontcount <- as.character(as_factor(survey$q6063))
survey$leftout <- as.character(as_factor(survey$q6064))


## question place
survey$question_place <- "before party"


# subset
survey_38512 <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                          "inequality", "inequality.variable", "union.self", "union.other",
                          "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                          "age", "race", "party", "ideology", "gender", "religion",
                          "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                          "question_place")]

# save file
#saveRDS(survey_38512, file = "Harris_Data/survey_38512.rds")
